# table 4: outcomes for nsw

library(tidyverse)
library(readxl)
library(geepack)

data_directory = "Z:/LowValueCare/APDC/on-ny-nsw"

##### covariate functions #####
# centre covariates on the mean ontario age and hospital volume
adjust_covariates <- function(cohort, mean.ontario.age, mean.ontario.volume){
  cohort %>%
    mutate(age_centred = age - mean.ontario.age,
           volume_centred = av_proc_year - mean.ontario.volume) -> cohort
  return(cohort)
}

##### match 2016 neighbourhood income ####

sa2_cor = read_xlsx("sa2_correspondance.xlsx")
abs_income = read_xlsx("income_sa2.xlsx")
abs_income %>%
  select(sa2,quintile_personal) %>%
  right_join(sa2_cor,by=c("sa2"="sa2_maincode_2016")) -> sa2_cor
# note missing values here are because the sa2_cor file is for all Australia, not just NSW (not included in the income file)
sa2_cor %>%
  filter(sa2 %in% abs_income$sa2) -> sa2_cor

# convert the sa2 2011 to income quintiles which are based on sa2 2016 boundaries
sa2_cor %>%
  select(sa2_maincode_2011,quintile_personal,ratio) %>%
  distinct() %>% 
  group_by(sa2_maincode_2011,quintile_personal) %>%
  summarise(ratio = sum(ratio)) -> sa2_cor

### function: join to cohort table
sa2_income <- function(cohort,sa2_cor){
  cohort %>%
    left_join(sa2_cor,by=c("SA2_2011_CODE" = "sa2_maincode_2011")) -> cohort_income
  
  cohort_income %>%
    group_by(recnum) %>%
    summarise(multiple_quintiles = n()) %>%
    filter(multiple_quintiles>1) %>%
    nrow() -> multiple_quintiles
  
  cohort_income %>% 
    group_by(recnum) %>%
    summarise(test = all(is.na(ratio)),
              neighbourhood_income = ifelse(test==TRUE,
                                            as.numeric(NA),
                                            quintile_personal[which.max(ratio)])) %>%
      ungroup() -> cohort_income
  
  print(paste0("There were ",multiple_quintiles," records (", 
               round(multiple_quintiles/nrow(cohort)*100,digits=1),
               "%) where the SA2 2011 code corresponded to more than one income quintile."))
  
  cohort %>%
    left_join(cohort_income,by=c("recnum" = "recnum")) %>% 
    mutate(neighbourhood_income = factor(neighbourhood_income)) %>%
    mutate(neighbourhood_income = fct_relevel(neighbourhood_income,"3")) -> cohort
  
  return(cohort)
}

##### unadjusted outcomes #####
### hospital length of stay ###
los_unadjusted <- function(cohort){
  cohort %>%
    summarise(los_mean = mean(los_eoc[los_eoc < 100]),
              los_median = median(los_eoc),
              los_100perc = sum(los_eoc>100),
              los_iqr = paste0(round(quantile(los_eoc,.25),2),", ", round(quantile(los_eoc,.75),2)),
              los_sd = sd(los_eoc[los_eoc < 100]),
              los_se = los_sd/sqrt(n())) -> los_procedure
  return(los_procedure)
}

### binary outcomes ###
binary_outcomes <- function(cohort){
  cohort %>%
    mutate_at(vars(death_episode,death_episode_7days,discharge_home,discharge_longterm,discharge_other),
              as.logical) -> cohort
  return(cohort)
}

binary_unadjusted <- function(cohort,outcome.variable){
  outcome = quo(!!rlang::sym(outcome.variable)) 
  if (outcome.variable %in% c("readmit_30","readmit_90")){
    cohort <- cohort %>%
      filter(death_episode != 1) 
  }
  cohort %>%
    summarise(var.sum = sum(!!outcome),
              var.prop = var.sum/n(),
              var.se = sqrt(var.prop*(1-var.prop)/n())) -> bin_procedure
  
  return(bin_procedure)
}

##### adjusted outcomes #####

## model 1: age and sex adjusted

# length of stay
model1_los <- function(cohort,procedure){
  test_data <- cohort %>%
    select(ppn,los_eoc,age_centred,sex)
  test_order <- order(as.integer(test_data$ppn))
  test_data <- test_data[test_order,]
  test_data <- test_data %>% filter(los_eoc < 100)
  if (procedure == "prostat"){
    M1 <- gee(los_eoc ~ age_centred, id = ppn, family = gaussian,
                 corstr = "independence",data = test_data) # RP had no repeated patients
  } else {
    M1 <- geeglm(los_eoc ~ age_centred + sex, id = ppn, family = gaussian,
                 corstr = "exchangeable",data = test_data)
  }
  return(M1)
}

# binary
model1_bin <- function(cohort,var_response,procedure){
  response <- quo(!!rlang::sym(var_response)) 
  test_data <- cohort %>%
    select(ppn,!!response,age_centred,sex,death_episode) %>%
    rename(response_col = !!response)
  # exclude those who died in hospital from the readmission
  if (var_response %in% c("readmit_30","readmit_90")){
    test_data <- test_data %>%
      filter(death_episode != 1)
  }
  test_order <- order(as.integer(test_data$ppn))
  test_data <- test_data[test_order,]
  if (procedure == "prostat"){ # this has no repeated ppns
    M1 <- glm(response_col ~ age_centred,family = "binomial",data = test_data)
  } else {
    M1 <- geeglm(response_col ~ age_centred + sex, id = ppn, 
                       family = "binomial",corstr = "exchangeable",data = test_data)
  }
  
  return(M1)
}

## model 2: age and sex and neighbourhood income and hospital volume

model2_los <- function(cohort,procedure){
  test_data <- cohort %>%
    select(ppn,los_eoc,age_centred,sex,volume_centred,neighbourhood_income)
  test_order <- order(as.integer(test_data$ppn))
  test_data <- test_data[test_order,]
  test_data <- test_data %>% filter(los_eoc < 100)
  test_data <- test_data %>% drop_na()
  if (procedure == "prostat"){
    M1 <- glm(los_eoc ~ age_centred + neighbourhood_income + volume_centred, family = gaussian,
              data = test_data)
  } else {
    M1 <- geeglm(los_eoc ~ age_centred + sex + neighbourhood_income + volume_centred, 
              id = ppn, family = gaussian,
              corstr = "exchangeable",data = test_data)
  }
  return(M1)
}

# binary
model2_bin <- function(cohort,var_response,procedure){
  response <- quo(!!rlang::sym(var_response)) 
  test_data <- cohort %>%
    select(ppn,!!response,age_centred,sex,death_episode,volume_centred,neighbourhood_income) %>%
    rename(response_col = !!response)
  # exclude those who died in hospital from the readmission
  if (var_response %in% c("readmit_30","readmit_90")){
    test_data <- test_data %>%
      filter(death_episode != 1)
  }
  test_order <- order(as.integer(test_data$ppn))
  test_data <- test_data[test_order,]
  test_data <- test_data %>% drop_na()
  if (procedure == "prostat"){
    M1 <- glm(response_col ~ age_centred + neighbourhood_income + volume_centred, 
                       family = "binomial",data = test_data),
                   
  } else {
    M1 <- geeglm(response_col ~ age_centred + sex + neighbourhood_income + volume_centred, id = ppn, 
                       family = "binomial",corstr = "exchangeable",data=test_data),
                   
  }
  
  return(M1)
}
